Code
library(duckdb)
# to start an in-memory database
con <- dbConnect(duckdb())Tony Duan
January 1, 2022

out <-
palmerpenguins::penguins %>%
# CAVEAT: factor columns are not supported yet
mutate(across(where(is.factor), as.character)) %>%
as_duckplyr_df() %>%
mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
filter(species != "Gentoo")┌───────────────────────────┐
│ HASH_GROUP_BY │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ #0 │
│ #1 │
│ mean(#2) │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ species │
│ sex │
│ bill_area │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ species │
│ sex │
│ bill_area │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ FILTER │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ (species != 'Gentoo') │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ EC: 68 │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ R_DATAFRAME_SCAN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ data.frame │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ species │
│ bill_length_mm │
│ bill_depth_mm │
│ sex │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ EC: 344 │
└───────────────────────────┘
https://www.youtube.com/watch?v=V9GwSPjKMKw
https://github.com/duckdblabs/duckplyr/
https://github.com/duckdb/duckdb-r
https://duckdb.org/docs/api/r.html
---
title: "duckdb in R"
author: "Tony Duan"
date: "2022-01-01"
categories: [R]
image : "images/1.png"
execute:
warning: false
error: false
format:
html:
toc: true
toc-location: left
code-fold: show
code-tools: true
number-sections: true
code-block-bg: true
code-block-border-left: "#31BAE9"
---
{width="500"}
# duckdb
## connect
```{r}
library(duckdb)
# to start an in-memory database
con <- dbConnect(duckdb())
```
## write data into duckdb
```{r}
dbWriteTable(con, "iris_table", iris)
```
```{r}
data <- dbGetQuery(con, "SELECT * FROM iris_table limit 5")
```
```{r}
data
```
```{r}
dbDisconnect()
```
# duckplyr
```{r}
#install.packages("conflicted")
#install.packages("duckplyr")
library(conflicted)
library(duckplyr)
library(duckdb)
conflict_prefer("filter", "duckplyr")
```
```{r}
out <-
palmerpenguins::penguins %>%
# CAVEAT: factor columns are not supported yet
mutate(across(where(is.factor), as.character)) %>%
as_duckplyr_df() %>%
mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
filter(species != "Gentoo")
```
```{r}
class(out)
```
```{r}
names(out)
```
```{r}
out %>%
explain()
```
# reference
https://www.youtube.com/watch?v=V9GwSPjKMKw
https://github.com/duckdblabs/duckplyr/
https://github.com/duckdb/duckdb-r
https://duckdb.org/docs/api/r.html